# This code is to generate tables and figures for the spell analysis
# Created by Shuyi Qiu
# Generated on Mar. 2nd, 2024
# Last edited on Apr. 4th, 2024
# Dataset generated from spell_data_012324.R

# Settings ----
set.seed(27705)
setwd("/Users/RachelChiu/Documents/ReProj/PSID/spell")
library(tidyverse)
library(ggplot2)
library(psidread)
library(dplyr)
library(haven)
library(readxl)
library(data.table)
library(Amelia)
library(janitor)
library(mitools)
library(mice)
library(weights)
library(radiant)
library(miceadds)
library(sandwich)

# Keep 3 substantial stats and remove the leading 0s

rmlead0 <- function(x, digits) {
  x <- round(x, digits = digits)
  
  ifelse(x < 1 & x > -1,
         sub("0\\.", ".", format(x, scientific = FALSE)),  # Escape the dot because it's a special character in regex
         format(x, scientific = FALSE))
}

# Collapse imputed dataset ----
imp_long_df <- data.frame()
for (i in c(1:5)){
  cdoc_imp <- along.out$imputations[[i]] |> 
    clean_names() |> 
    mutate(.imp = i)
  
  cov_df <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_children_gt3 = ifelse(n_children >= 3, T, F),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F),
           debtp = ifelse(astp == F & nwp == T, T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid) |> 
    summarise(
      total_wave = sum(fu_year == T),
      
      n_children_gt3_prop = sum(n_children_gt3 == T)/total_wave,
      
      edu_hh_lths_prop = sum(hh_educ <= 12)/total_wave,
      edu_hh_hsc_prop = sum(hh_educ > 12 & hh_educ < 16)/total_wave,
      edu_hh_gtba_prop = sum(hh_educ >= 16)/total_wave,
      
      age_hh_lt35_prop = sum(hh_age < 35)/total_wave,
      age_hh_gt35_prop = sum(hh_age >= 35)/total_wave,
      
      poorhealth_hh_prop = sum(hh_poorhealth == 1)/total_wave,
      
      h_gender_f_prop = sum(hh_sex == "F")/total_wave,
      
      h_race_nhwhite_prop = sum(hh_racehisp == "White")/total_wave,
      h_race_nhblack_prop = sum(hh_racehisp == "Black")/total_wave,
      h_race_hispanic_prop = sum(hh_racehisp == "Hispanic")/total_wave,
      h_race_other_prop = sum(hh_racehisp == "Others")/total_wave,
      
      h_marriage_married_prop = sum(hh_marital == "Married")/total_wave,
      h_marriage_single_prop = sum(hh_marital == "Single")/total_wave,
      h_marriage_widowed_prop = sum(hh_marital == "Widowed")/total_wave,
      h_marriage_div_prop = sum(hh_marital == "Divorced or Separated")/total_wave,
      h_marriage_nonmarried_prop = sum(hh_marital != "Married")/total_wave,
      
      foodstamps_used_prop = sum(h_foodstamps == 1)/total_wave,
      
      with_gp_true_prop = sum(h_livewgp == 1)/total_wave,
      hh_parents_true_prop = sum(hh_parents == 1)/total_wave,
      hh_gp_true_prop = sum(hh_gp == 1)/total_wave,
      
      incp_prop = sum(incp == 1)/total_wave,
      
      pwt = mean(pwt)
    )
  
  pov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |> 
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F),
           debtp = ifelse(astp == F & nwp == T, T, F),
           incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    group_by(pid, .imp) |> 
    summarise(
      num_nwp = sum(nwp),
      num_fu = n(),
      ever_nwp = ifelse(num_nwp > 0, T, F),
      pct_nwp = num_nwp/num_fu,
      num_incp = sum(incp),
      ever_incp = ifelse(num_incp > 0, T, F),
      pct_incp = num_incp/num_fu,
      num_astp = sum(astp),
      ever_astp = ifelse(num_astp > 0, T, F),
      pct_astp = num_astp/num_fu,
      num_debtp = sum(debtp),
      ever_debtp = ifelse(num_debtp > 0, T, F),
      pct_debtp = num_debtp/num_fu
    )
  
  nwpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(nwp)) |> 
    filter(nwp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_nwp = max(spell_length),
              num_spell_nwp = n())
  
  incpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(incp)) |> 
    filter(incp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_incp = max(spell_length),
              num_spell_incp = n())
  
  astpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(astp)) |> 
    filter(astp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_astp = max(spell_length),
              num_spell_astp = n())
  
  debtpov_desc <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F),
           debtp = ifelse(astp == F & nwp == T, T, F)) |> 
    #filter(indfid != 0) |> 
    group_by(pid) |> 
    mutate(grp = rleid(debtp)) |> 
    filter(debtp == TRUE) |> 
    group_by(pid, grp) |> 
    summarise(spell_length = n()) |> 
    group_by(pid) |> 
    summarise(longest_spell_debtp = max(spell_length),
              num_spell_debtp = n())
  
  nwpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      nwp_pct = sum(nwp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_pct,
      names_prefix = "nwp_pct"
    )
  
  incpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      incp_pct = sum(incp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_pct,
      names_prefix = "incp_pct"
    )
  
  astpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      astp_pct = sum(astp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = astp_pct,
      names_prefix = "astp_pct"
    )
  
  debtpov_pct_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F),
           debtp = ifelse(astp == F & nwp == T, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid, age_cat) |> 
    summarise(
      total_wave = n(),
      debtp_pct = sum(debtp)/total_wave 
    ) |> 
    select(-total_wave) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = debtp_pct,
      names_prefix = "debtp_pct"
    )
  
  nwpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_nwp = sum(nwp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      nwp_share = sum(nwp)/mean(num_nwp),
      nwp_share = ifelse(is.nan(nwp_share), 0, nwp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = nwp_share,
      names_prefix = "nwp_share"
    )
  
  incpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(incp = ifelse(inc*Ratio21_inc <= inc_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_incp = sum(incp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      incp_share = sum(incp)/mean(num_incp),
      incp_share = ifelse(is.nan(incp_share), 0, incp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = incp_share,
      names_prefix = "incp_share"
    )
  
  astpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_astp = sum(astp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      astp_share = sum(astp)/mean(num_astp),
      astp_share = ifelse(is.nan(astp_share), 0, astp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = astp_share,
      names_prefix = "astp_share"
    )
  
  debtpov_share_byage <- cdoc_imp |> 
    filter(fu_year == T) |> 
    mutate(year = as.numeric(year),
           n_children = ifelse(n_children >= 8, 8, n_children),
           n_household = ifelse(n_children + n_adults >= 9, 9, n_children + n_adults)) |> 
    left_join(pov_thresholds |> filter(year == 2021) |> select(n_household, n_children, w_threshold, inc_threshold), by = c("n_household", "n_children")) |> 
    left_join(inflation_df %>% select(Year, Ratio21), by = c("year" = "Year")) |> 
    left_join(inflation_df |> select(Year, Ratio21) |> mutate(Year = Year + 1), by = c("year" = "Year"), suffix = c("_nw","_inc")) |>  
    mutate(nwp = ifelse((tassets - tdebts + homevalue - vehicles)*Ratio21_nw <= w_threshold, T, F),
           astp = ifelse((tassets + homevalue - vehicles) <= w_threshold, T, F),
           debtp = ifelse(astp == F & nwp == T, T, F),
           p_age = ifelse(p_age == 0 | p_age > 100, NA, p_age), 
           p_age_imp = entry_age + year - entry_year,
           p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
           age_cat = as.factor(case_when(
             p_age_comp >= 0 & p_age_comp <= 6 ~ "0to6",
             p_age_comp > 6 & p_age_comp <= 12 ~ "6to12",
             p_age_comp > 12 ~ "12to18"
           ))) |> 
    group_by(pid) |> 
    mutate(num_debtp = sum(debtp)) |> 
    group_by(pid, age_cat) |> 
    summarise(
      debtp_share = sum(debtp)/mean(num_debtp),
      debtp_share = ifelse(is.nan(debtp_share), 0, debtp_share)
    ) |> 
    pivot_wider(
      names_from = age_cat,
      values_from = debtp_share,
      names_prefix = "debtp_share"
    )
  
  cdoc_df <- cdoc_imp |> 
    filter(year == ms_year) |> 
    select(pid, clg_enrolled, hs_completed, pwt) |> 
    rename(pwt_ms = pwt)
  
  demo_df <- cdoc_imp |> 
    filter(year == entry_year) |> 
    select(pid, hh_racehisp, hh_age, hh_educ, hh_marital) |> 
    rename(hh_racehisp_entry = hh_racehisp,
           hh_age_entry = hh_age
    ) |> 
    mutate(hh_age_gt35_entry = ifelse(hh_age_entry > 35, T, F),
           hh_educ_entry = as.factor(case_when(
             hh_educ < 12 ~ "lths",
             hh_educ >= 12 & hh_educ < 16 ~ "hsc",
             hh_educ >= 16 ~ "gtba"
           )),
           hh_marital_entry = ifelse(hh_marital == "Married", T, F)) |> 
    select(pid, hh_racehisp_entry, hh_age_gt35_entry, hh_educ_entry, hh_marital_entry)
  
  imp_df <- sample_id |> 
    left_join(nwpov_desc, by = "pid") |> 
    left_join(incpov_desc, by = "pid") |> 
    left_join(astpov_desc, by = "pid") |> 
    left_join(debtpov_desc, by = "pid") |> 
    left_join(cdoc_df, by = "pid") |> 
    left_join(pov_desc, by = "pid") |> 
    left_join(cov_df, by = "pid") |> 
    left_join(cdoc_imp |> filter(year == entry_year) |> select(pid, is_first_born, p_gender), by = "pid") |> 
    left_join(nwpov_pct_byage, by = "pid") |> 
    left_join(nwpov_share_byage, by = "pid") |> 
    left_join(incpov_pct_byage, by = "pid") |> 
    left_join(incpov_share_byage, by = "pid") |> 
    left_join(astpov_pct_byage, by = "pid") |> 
    left_join(astpov_share_byage, by = "pid") |> 
    left_join(debtpov_pct_byage, by = "pid") |> 
    left_join(debtpov_share_byage, by = "pid") |> 
    left_join(demo_df, by = "pid") |> 
    mutate(longest_spell_nwp = ifelse(is.na(longest_spell_nwp), 0, longest_spell_nwp),
           longest_spell_incp = ifelse(is.na(longest_spell_incp), 0, longest_spell_incp),
           longest_spell_astp = ifelse(is.na(longest_spell_astp), 0, longest_spell_astp),
           longest_spell_debtp = ifelse(is.na(longest_spell_debtp), 0, longest_spell_debtp),
           num_spell_nwp = ifelse(is.na(num_spell_nwp), 0, num_spell_nwp),
           num_spell_incp = ifelse(is.na(num_spell_incp), 0, num_spell_incp),
           num_spell_astp = ifelse(is.na(num_spell_astp), 0, num_spell_astp),
           num_spell_debtp = ifelse(is.na(num_spell_debtp), 0, num_spell_debtp))
  
  imp_long_df <- imp_long_df %>%
    bind_rows(imp_df)
}

getImpList <- function(MIdata, impIDColumn){
  d <- list()
  imps <- max(MIdata[,impIDColumn])
  for(i in 1:imps){
    d[[i]] <- MIdata[MIdata[,impIDColumn]==i,]
  }
  return(d)
}

library(mitools)
temp <- imp_long_df |> 
  mutate(consec_nwp = factor(case_when(
    num_spell_nwp == 0 & num_nwp == 0 ~ "Never",
    num_spell_nwp == 1 & num_nwp == 1 ~ "One wave",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp <= 0.5 ~ "Short consec",
    num_spell_nwp == 1 & num_nwp > 1 & pct_nwp > 0.5 ~ "Long consec", 
    num_spell_nwp > 1 & pct_nwp <= 0.5 ~ "Short fluc",
    num_spell_nwp > 1 & pct_nwp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")),
  consec_incp = factor(case_when(
    num_spell_incp == 0 & num_incp == 0 ~ "Never",
    num_spell_incp == 1 & num_incp == 1 ~ "One wave",
    num_spell_incp == 1 & num_incp > 1 & pct_incp <= 0.5 ~ "Short consec",
    num_spell_incp == 1 & num_incp > 1 & pct_incp > 0.5 ~ "Long consec", 
    num_spell_incp > 1 & pct_incp <= 0.5 ~ "Short fluc",
    num_spell_incp > 1 & pct_incp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")),
  consec_astp = factor(case_when(
    num_spell_astp == 0 & num_astp == 0 ~ "Never",
    num_spell_astp == 1 & num_astp == 1 ~ "One wave",
    num_spell_astp == 1 & num_astp > 1 & pct_astp <= 0.5 ~ "Short consec",
    num_spell_astp == 1 & num_astp > 1 & pct_astp > 0.5 ~ "Long consec", 
    num_spell_astp > 1 & pct_astp <= 0.5 ~ "Short fluc",
    num_spell_astp > 1 & pct_astp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")),
  consec_debtp = factor(case_when(
    num_spell_debtp == 0 & num_debtp == 0 ~ "Never",
    num_spell_debtp == 1 & num_debtp == 1 ~ "One wave",
    num_spell_debtp == 1 & num_debtp > 1 & pct_debtp <= 0.5 ~ "Short consec",
    num_spell_debtp == 1 & num_debtp > 1 & pct_debtp > 0.5 ~ "Long consec", 
    num_spell_debtp > 1 & pct_debtp <= 0.5 ~ "Short fluc",
    num_spell_debtp > 1 & pct_debtp > 0.5 ~ "Long fluc"
  ), levels = c("Never","One wave","Short fluc","Short consec","Long fluc","Long consec")))

temp$consec_nwp <- relevel(temp$consec_nwp, ref = "One wave")
temp$consec_incp <- relevel(temp$consec_incp, ref = "Never")
temp$consec_astp <- relevel(temp$consec_astp, ref = "Never")
temp$consec_debtp <- relevel(temp$consec_debtp, ref = "Never")
temp$hh_racehisp_entry <- relevel(temp$hh_racehisp_entry, ref = "White")
temp$hh_educ_entry <- relevel(temp$hh_educ_entry, ref = "lths")

temp <- temp |> 
  left_join(d |> select(pid, indfid, year) |> mutate(year = as.numeric(year)), by = c("pid", "entry_year" = "year"))

# temp <- temp |> filter(entry_age == 1)
new_imp <- imputationList(getImpList(temp, c(".imp")))


# Table 1. Descriptive Statistics ----
cont_varlist <- c("incp_prop","n_children_gt3_prop","h_gender_f_prop","poorhealth_hh_prop","with_gp_true_prop","num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp",
                  "nwp_pct0to6", "nwp_pct6to12","nwp_pct12to18","nwp_share0to6","nwp_share6to12","nwp_share12to18",
                  "incp_pct0to6","incp_pct6to12","incp_pct12to18","incp_share0to6","incp_share6to12","incp_share12to18")
cat_varlist <- c("is_first_born","p_gender","hh_age_gt35_entry","hh_racehisp_entry","hh_educ_entry","hh_marital_entry","consec_nwp")


## Asset and debt poverty descriptive statistics
cont_varlist <- c("num_nwp","num_incp","num_astp","num_debtp",
                  "pct_nwp","pct_incp","pct_astp","pct_debtp",
                  "num_spell_nwp","num_spell_incp","num_spell_astp","num_spell_debtp",
                  "longest_spell_nwp","longest_spell_incp","longest_spell_astp","longest_spell_debtp",
                  "nwp_pct0to6","nwp_pct6to12","nwp_pct12to18",
                  "incp_pct0to6","incp_pct6to12","incp_pct12to18",
                  "astp_pct0to6","astp_pct6to12", "astp_pct12to18",
                  "debtp_pct0to6","debtp_pct6to12","debtp_pct12to18",
                  "nwp_share0to6","nwp_share6to12","nwp_share12to18",
                  "incp_share0to6","incp_share6to12","incp_share12to18",
                  "astp_share0to6","astp_share6to12","astp_share12to18",
                  "debtp_share0to6","debtp_share12to18","debtp_share6to12")
cat_varlist <- c("ever_nwp","ever_incp","ever_astp","ever_debtp",
                 "consec_nwp", "consec_incp","consec_astp","consec_debtp")




contdesc_df <- data.frame(varname = cont_varlist)

for (i in cont_varlist){
  res <- with(new_imp, expr = {weighted.mean(get(i), pwt, na.rm = TRUE)})
  theta_bar <- mean(unlist(res))
  contdesc_df[contdesc_df$varname == i,"sumstats"] <- theta_bar
  
  res <- with(new_imp, expr = {weighted.sd(get(i), pwt, na.rm = TRUE)})
  vw <- sum(unlist(res)^2)/length(res)
  
  res <- with(new_imp, expr = {(weighted.mean(get(i), pwt, na.rm = TRUE) - theta_bar)^2})
  vb <- sum(unlist(res))/(length(res) - 1)
  
  vt <- vw + vb + vb/length(res)
  contdesc_df[contdesc_df$varname == i,"std"] <- sqrt(vt)
}

catdesc_df <- data.frame()
for (i in cat_varlist){
  res <- with(new_imp, expr = {wpct(get(i), pwt) * 100})
  pcts <- tapply(unlist(res), names(unlist(res)), mean)
  pcts_df <- data.frame(
    varname = names(pcts),
    sumstats = pcts
  )
  catdesc_df <- catdesc_df |> 
    bind_rows(pcts_df)
}

res_df <- bind_rows(contdesc_df, catdesc_df)
write.csv(res_df, file = "temp.csv")

# Figure 1. Distribution of spells ----
pool_mean <- with(imp_long_df, by(imp_long_df, .imp, 
                                  function(x) c(weighted.mean(x$ever_nwp, w = x$pwt) * 100, 
                                                wpct(x$num_nwp, w = x$pwt) * 100,
                                                weighted.mean(x$pct_nwp, w = x$pwt),
                                                wpct(x$longest_spell_nwp, w = x$pwt) * 100,
                                                wpct(x$num_spell_nwp, w = x$pwt) * 100,
                                                weighted.mean(x$ever_incp, w = x$pwt)* 100,
                                                wpct(x$num_incp, w = x$pwt)* 100,
                                                weighted.mean(x$pct_incp, w = x$pwt),
                                                wpct(x$longest_spell_incp, w = x$pwt)* 100,
                                                wpct(x$num_spell_incp, w = x$pwt)* 100)))
nwp_summary_stats <- Reduce("+",pool_mean)/length(pool_mean)
write.csv(t(nwp_summary_stats), file = "temp.csv")

pool_mean <- with(temp, by(temp, .imp, 
                                  function(x) c(wpct(x$consec_nwp, w = x$pwt) * 100)))
nwp_summary_stats <- Reduce("+",pool_mean)/length(pool_mean)
nwp_summary_stats

pool_mean <- with(temp, by(temp, .imp, 
                           function(x) c(wpct(x$consec_incp, w = x$pwt) * 100)))
nwp_summary_stats <- Reduce("+",pool_mean)/length(pool_mean)
nwp_summary_stats



# Table 2. Educ ~ NWP ----
# fit <- with(data = new_imp, exp = glm(hs_completed ~ num_nwp + age_hh_lt35_prop + incp_prop, family = binomial, weights = pwt))
# summary(pool(fit)) |> 
#   mutate(star = case_when(
#     p.value < 0.001 ~ "***",
#     p.value >= 0.001 & p.value < 0.05 ~ "**",
#     p.value >= 0.05 & p.value < 0.1 ~ "*",
#     TRUE ~ ""
#   ))

reg_summary <- data.frame(term = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec_nwp","ever_nwp")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(data = new_imp, exp = glm(formula = get(j) ~ get(i) + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
    reg_temp <- summary(pool(fit)) %>%
      mutate(
        estimate_star = case_when(
          p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(rmlead0(estimate)), "*", sep = ""),
          p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(rmlead0(estimate)), "**", sep = ""),
          p.value < 0.01 & p.value >= 0 ~ paste(as.character(rmlead0(estimate)), "***", sep = ""),
          TRUE ~ as.character(round(estimate,3))
        ),
        stdev = paste0("(",rmlead0(std.error),")")
      ) %>%
      select(term, estimate_star, stdev)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = "term", suffix = c("",i))
  }
}
write.csv(reg_summary, file = "temp.csv")




# Table 2. (Clustered) Educ ~ NWP ----
datlist <- miceadds::mids2datlist(new_imp)
save(datlist, file = "AddData/foreg.RData")
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec_nwp","ever_nwp")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ get(i) + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

# write.csv(reg_summary, file = "temp.csv",quote = TRUE)

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


## Table 2.a Simple Regression----
reg_summary <- data.frame(term = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(data = new_imp, exp = glm(get(j) ~ get(i), family = binomial, weights = pwt))
    reg_temp <- summary(pool(fit)) %>%
      mutate(
        estimate_star = case_when(
          p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
          p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
          p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
          TRUE ~ as.character(round(estimate,3))
        ),
        stdev = paste0("(",round(std.error, 3),")")
      ) %>%
      select(term, estimate_star, stdev)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = "term", suffix = c("",i))
  }
}
write.csv(reg_summary, file = "temp.csv")

## Table 2.b Adjusted Regression----
reg_summary <- data.frame(term = NA)
for (i in c("num_nwp","pct_nwp","num_spell_nwp", "longest_spell_nwp","consec")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(data = new_imp, exp = glm(get(j) ~ get(i) + is_first_born + p_gender, family = binomial, weights = pwt))
    reg_temp <- summary(pool(fit)) %>%
      mutate(
        estimate_star = case_when(
          p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
          p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
          p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
          TRUE ~ as.character(round(estimate,3))
        ),
        stdev = paste0("(",round(std.error, 3),")")
      ) %>%
      select(term, estimate_star, stdev)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = "term", suffix = c("",i))
  }
}
write.csv(reg_summary, file = "temp.csv")


# Figure X. Spell of NWP, by parental age ----
imp_long_df <- imp_long_df |> 
  left_join(forimp |> filter(year == entry_year) |> select(pid, hh_age_gt35) |> rename(hh_age_gt35_entry = hh_age_gt35), by = "pid")

pool_mean <- with(imp_long_df |> filter(hh_age_gt35_entry == 1), by(imp_long_df|> filter(hh_age_gt35_entry == 1), .imp, 
                                  function(x) c(weighted.mean(x$ever_nwp, w = x$pwt) * 100, 
                                                wpct(x$num_nwp, w = x$pwt) * 100,
                                                weighted.mean(x$pct_nwp, w = x$pwt),
                                                wpct(x$longest_spell_nwp, w = x$pwt) * 100,
                                                wpct(x$num_spell_nwp, w = x$pwt) * 100)))

pool_mean <- with(imp_long_df |> filter(hh_age_gt35_entry == 1), by(imp_long_df|> filter(hh_age_gt35_entry == 1), .imp, 
                                                                    function(x) c(weighted.mean(x$ever_incp, w = x$pwt)* 100,
                                                                                  wpct(x$num_incp, w = x$pwt)* 100,
                                                                                  weighted.mean(x$pct_incp, w = x$pwt),
                                                                                  wpct(x$longest_spell_incp, w = x$pwt)* 100,
                                                                                  wpct(x$num_spell_incp, w = x$pwt)* 100)))                 
                  


pool_mean

nwp_summary_stats <- Reduce("+",pool_mean)/length(pool_mean)
write.csv(t(nwp_summary_stats), file = "temp.csv")

# Table 3. By age ----
reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")

# Share of poverty waves in each developmental stage
reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")

# Table 3. (Clustered) By age ----
datlist <- miceadds::mids2datlist(new_imp)
reg_summary <- data.frame(term = NA, name = NA)

  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
  }


# write.csv(reg_summary, file = "temp.csv")

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


# write.csv(reg_summary, file = "temp.csv")

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


# Table 3. (Revised Share) Condition on Poverty ----
datlist <- miceadds::mids2datlist(new_imp)
# Conditional on those who ever experienced poverty for at least one wave
datlist_sub  <- subset_datlist(datlist,  expr_subset=expression(nwp_share0to6 + nwp_share6to12 + nwp_share12to18 > 0) )
reg_summary <- data.frame(term = NA, name = NA)

for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist_sub, glm(formula = get(j) ~ nwp_share6to12 + nwp_share12to18 + incp_prop + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist_sub$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


# write.csv(reg_summary, file = "temp.csv")

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

# Table 4. Income Mirror ----
reg_summary <- data.frame(term = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(data = new_imp, exp = glm(get(j) ~ get(paste0(i,"nwp")) + get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
    reg_temp <- summary(pool(fit)) %>%
      mutate(
        estimate_star = case_when(
          p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
          p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
          p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
          TRUE ~ as.character(round(estimate,3))
        ),
        stdev = paste0("(",round(std.error, 3),")")
      ) %>%
      select(term, estimate_star, stdev)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = "term", suffix = c("",i))
  }
}
write.csv(reg_summary, file = "temp.csv")

# Table 4. (Clustered) Income Mirror ----
reg_summary <- data.frame(term = NA, name = NA)
for (i in c("num_","pct_","num_spell_", "longest_spell_","ever_")){
  for (j in c("hs_completed","clg_enrolled")){
    fit <- with(datlist, glm(formula = get(j) ~ get(paste0(i,"nwp")) + get(paste0(i,"incp")) + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
    betas <- lapply(fit, coef)
    vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
    reg_temp <- summary(pool_mi(betas, vars)) %>%
      mutate(
        estimate_star = case_when(
          p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
          p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
          p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
          TRUE ~ as.character(rmlead0(results,4))
        ),
        stdev = paste0("(",rmlead0(se,4),")")
      ) %>%
      select(estimate_star, stdev)
    reg_temp$term <- rownames(reg_temp)
    reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
    reg_temp <- as.data.frame(reg_temp)
    reg_summary <- reg_summary %>%
      full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(i,j)))
  }
}

# write.csv(reg_summary, file = "temp.csv")

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)




# Table 5. Income Mirror, by age ----

try <- temp |> 
  group_by(.imp, consec_nwp, consec_incp) |> 
  summarise(
    wtd_count = sum(pwt)
  ) |> 
  group_by(.imp) |> 
  mutate(wtd_pct = wtd_count/sum(wtd_count)) |> 
  group_by(consec_nwp, consec_incp) |> 
  summarise(pooled_wtd_pct = mean(wtd_pct)) |> 
  pivot_wider(
    values_from = pooled_wtd_pct,
    names_from = consec_incp,
    names_prefix = "incp_"
  )

reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_pct0to6 + incp_pct6to12 + incp_pct12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")

reg_summary <- data.frame(term = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(data = new_imp, exp = glm(get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_share0to6 + incp_share6to12 + incp_share12to18+ is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = binomial, weights = pwt))
  reg_temp <- summary(pool(fit)) %>%
    mutate(
      estimate_star = case_when(
        p.value < 0.1 & p.value >= 0.05 ~ paste(as.character(round(estimate,3)), "*", sep = ""),
        p.value < 0.05 & p.value >= 0.01 ~ paste(as.character(round(estimate,3)), "**", sep = ""),
        p.value < 0.01 & p.value >= 0 ~ paste(as.character(round(estimate,3)), "***", sep = ""),
        TRUE ~ as.character(round(estimate,3))
      ),
      stdev = paste0("(",round(std.error, 3),")")
    ) %>%
    select(term, estimate_star, stdev)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = "term", suffix = c("",j))
}
write.csv(reg_summary, file = "temp.csv")

# Table 5. (Clustered) Income Mirro, by age ----
reg_summary <- data.frame(term = NA, name = NA)

for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_pct0to6 + nwp_pct6to12 + nwp_pct12to18 + incp_pct0to6 + incp_pct6to12 + incp_pct12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


# write.csv(reg_summary, file = "temp.csv")

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)

reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_share0to6 + nwp_share6to12 + nwp_share12to18 + incp_share0to6 + incp_share6to12 + incp_share12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


# write.csv(reg_summary, file = "temp.csv")

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)


# Table 5. (Revised Share) Income mirror ----
datlist <- miceadds::mids2datlist(new_imp)
# Conditional on those who ever experienced poverty for at least one wave
datlist_sub  <- subset_datlist(datlist,  expr_subset=expression(nwp_share0to6 + nwp_share6to12 + nwp_share12to18 > 0 & incp_share0to6 + incp_share6to12 + incp_share12to18 > 0) )

reg_summary <- data.frame(term = NA, name = NA)
for (j in c("hs_completed","clg_enrolled")){
  fit <- with(datlist, glm(formula = get(j) ~ nwp_share6to12 + nwp_share12to18 + incp_share6to12 + incp_share12to18 + is_first_born + p_gender + n_children_gt3_prop  + hh_age_gt35_entry + hh_racehisp_entry + hh_educ_entry + hh_marital_entry + h_gender_f_prop + poorhealth_hh_prop + with_gp_true_prop, family = gaussian, weights = pwt))
  betas <- lapply(fit, coef)
  vars <- lapply(fit, FUN = function(x){vcovCL(x, cluster = datlist$imputations[[1]]$indfid)}) #vcovCL is in the sandwich package
  reg_temp <- summary(pool_mi(betas, vars)) %>%
    mutate(
      estimate_star = case_when(
        p < 0.1 & p >= 0.05 ~ paste(as.character(rmlead0(results,4)), "*", sep = ""),
        p < 0.05 & p >= 0.01 ~ paste(as.character(rmlead0(results,4)), "**", sep = ""),
        p < 0.01 & p >= 0 ~ paste(as.character(rmlead0(results,4)), "***", sep = ""),
        TRUE ~ as.character(rmlead0(results,4))
      ),
      stdev = paste0("(",rmlead0(se,4),")")
    ) %>%
    select(estimate_star, stdev)
  reg_temp$term <- rownames(reg_temp)
  reg_temp <- reg_temp |> pivot_longer(cols = c("estimate_star","stdev"))
  reg_temp <- as.data.frame(reg_temp)
  reg_summary <- reg_summary %>%
    full_join(reg_temp, by = c("term","name"), suffix = c("",paste0(j)))
}


# write.csv(reg_summary, file = "temp.csv")

write.table(reg_summary, "temp.txt", sep="\t", row.names=FALSE, col.names=TRUE, quote=TRUE)





